from sklearn import preprocessing
from pathlib import Path
import sqlite3
from sqlite3 import Error
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from talib.abstract import *
import numpy as np
from pandas import DataFrame, read_json,to_datetime
import plotly.graph_objects as go
import plotly.express as px
pd.options.plotting.backend = "plotly"
import plotly.offline as pyo
pyo.init_notebook_mode()
BEARER_TOKEN = 'AAAAAAAAAAAAAAAAAAAAABjGTQEAAAAAdVcgzhlyHimfrHLIL9w3UbdaXb4%3DE9HCgzbdQ4r29QANZdDim9A1KkZc6DxCEaqiusn9DII5cYI2s8'
DATABASE = 'twitter_database.sqlite'
URL = 'https://api.twitter.com/2/tweets/search/recent'
FILE = 'SHIB_USDT-1d.json'
DATABASE_PATH = str(Path(".").resolve().parent.parent) + '/' + DATABASE
FILE_PATH = str(Path(".").resolve().parent.parent) + '/user_data/data/binance/' + FILE
print(DATABASE_PATH)
print(FILE_PATH)
/Users/julioibarra/Development/Desktop/freqtrade/twitter_database.sqlite /Users/julioibarra/Development/Desktop/freqtrade/user_data/data/binance/SHIB_USDT-1d.json
conn = None
try:
conn = sqlite3.connect(DATABASE_PATH)
print(sqlite3.version)
except Error as e:
print(e)
finally:
if conn:
conn.close()
2.6.0
def getlast_request(conn, currency, keyword):
cursor = conn.cursor()
query = 'SELECT * FROM Tweets WHERE currency = "'+ currency +'" GROUP BY strftime("%H:%M:%S 00:00:00", created_at, "+5 minutes")'
query = 'SELECT count(*) FROM Tweets'
cursor.execute(query)
request = cursor.fetchone()
return request
def gettables(conn):
cursor = conn.cursor()
query = 'SELECT name FROM sqlite_master WHERE type ="table" AND name NOT LIKE "sqlite_%"';
cursor.execute(query)
request = cursor.fetchone()
return request
Consultamos los tweets previamente descargados y mostramos en pantalla
currency = 'Bitcoin'
#query = 'SELECT * FROM Tweets WHERE currency = "'+ currency +'" GROUP BY strftime("%H:%M:%S 00:00:00", created_at, "+5 minutes")'
conn = sqlite3.connect(DATABASE_PATH)
df = pd.read_sql_query("SELECT * from Tweets WHERE currency = '" + currency + "'", conn)
df
| id | currency | search | twitter_id | text | created_at | |
|---|---|---|---|---|---|---|
| 0 | 1 | Bitcoin | Bitcoin | 1443912559610908674 | @ByTimReynolds I wish I contacted earlier @gre... | 2021-10-01T12:15:55.000Z |
| 1 | 2 | Bitcoin | Bitcoin | 1443912559199862787 | El Salvador has started mining #Bitcoin with p... | 2021-10-01T12:15:55.000Z |
| 2 | 3 | Bitcoin | Bitcoin | 1443912559153729546 | Bitcoin, aset kripto pertama dan terbesar berd... | 2021-10-01T12:15:55.000Z |
| 3 | 4 | Bitcoin | Bitcoin | 1443912556754710534 | #Bitcoin #Ethereum #Solana #Cardano #Btc #ADA ... | 2021-10-01T12:15:54.000Z |
| 4 | 5 | Bitcoin | Bitcoin | 1443912555630665728 | @ilhan_karasu Bitcoin yüzüklü eli öpülesi insan | 2021-10-01T12:15:54.000Z |
| ... | ... | ... | ... | ... | ... | ... |
| 501229 | 501230 | Bitcoin | Bitcoin | 1442185343160053761 | @nttcapp wonderful project.must join all.\n@Ra... | 2021-09-26T17:52:34.000Z |
| 501230 | 501231 | Bitcoin | Bitcoin | 1442185342077992960 | @PapaYaw0077 https://t.co/HvvypOi4NI | 2021-09-26T17:52:34.000Z |
| 501231 | 501232 | Bitcoin | Bitcoin | 1442185336923119623 | 😍 I am very happy to get this project ❤❤ This ... | 2021-09-26T17:52:33.000Z |
| 501232 | 501233 | Bitcoin | Bitcoin | 1442185329390223363 | C'est maintenant! En 2009, nous vous avons par... | 2021-09-26T17:52:31.000Z |
| 501233 | 501234 | Bitcoin | Bitcoin | 1442185329247612929 | Основатель Suex теряет руководящую должность и... | 2021-09-26T17:52:31.000Z |
501234 rows × 6 columns
Consultamos los tweets previamente procesados y categorizados en positivos, neutrales y negativos
query = "SELECT * from ProcessedTweets where currency = '" + currency + "'"
df_proccesed = pd.read_sql_query(query, conn)
df_proccesed['Date'] = pd.to_datetime(df_proccesed['created_at'])
df_proccesed.set_index('Date',inplace=True)
df_proccesed['total'] = 1
df_proccesed.head()
| id | localtweet_id | currency | twitter_id | positive | neutral | negative | created_at | total | |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2021-10-01 12:15:55+00:00 | 1 | 1 | Bitcoin | 1443912559610908674 | 1 | 0 | 0 | 2021-10-01T12:15:55.000Z | 1 |
| 2021-10-01 12:15:55+00:00 | 2 | 2 | Bitcoin | 1443912559199862787 | 1 | 0 | 0 | 2021-10-01T12:15:55.000Z | 1 |
| 2021-10-01 12:15:55+00:00 | 3 | 3 | Bitcoin | 1443912559153729546 | 0 | 1 | 0 | 2021-10-01T12:15:55.000Z | 1 |
| 2021-10-01 12:15:54+00:00 | 4 | 4 | Bitcoin | 1443912556754710534 | 0 | 1 | 0 | 2021-10-01T12:15:54.000Z | 1 |
| 2021-10-01 12:15:54+00:00 | 5 | 5 | Bitcoin | 1443912555630665728 | 0 | 1 | 0 | 2021-10-01T12:15:54.000Z | 1 |
Hacemos un reagrupamiento de datos en intervalos de 1 minuto.
df_proccesed_copy = df_proccesed.copy()
df_proccesed_copy.drop(columns=['id', 'localtweet_id', 'twitter_id'],
axis='columns', inplace=True)
df_proccesed_copy = df_proccesed_copy.resample('1T').sum()
#df_proccesed_copy = df_proccesed_copy.resample('30S').sum()
#df_proccesed_copy = df_proccesed_copy.resample("5T").sum()
#df_proccesed_copy = df_proccesed_copy.resample('4T').sum()
df_proccesed_copy
| positive | neutral | negative | total | |
|---|---|---|---|---|
| Date | ||||
| 2021-09-26 17:52:00+00:00 | 21 | 15 | 2 | 38 |
| 2021-09-26 17:53:00+00:00 | 32 | 32 | 8 | 72 |
| 2021-09-26 17:54:00+00:00 | 22 | 37 | 4 | 63 |
| 2021-09-26 17:55:00+00:00 | 42 | 23 | 6 | 71 |
| 2021-09-26 17:56:00+00:00 | 37 | 25 | 8 | 70 |
| ... | ... | ... | ... | ... |
| 2021-10-01 12:11:00+00:00 | 43 | 61 | 13 | 117 |
| 2021-10-01 12:12:00+00:00 | 48 | 84 | 11 | 143 |
| 2021-10-01 12:13:00+00:00 | 47 | 58 | 10 | 115 |
| 2021-10-01 12:14:00+00:00 | 57 | 66 | 13 | 136 |
| 2021-10-01 12:15:00+00:00 | 42 | 67 | 13 | 122 |
6864 rows × 4 columns
plt.figure(figsize = (15,5))
plt.plot(df_proccesed_copy.index, df_proccesed_copy.positive, color='green')
plt.plot(df_proccesed_copy.index, df_proccesed_copy.neutral, color='grey')
plt.plot(df_proccesed_copy.index, df_proccesed_copy.negative, color='red')
plt.plot(df_proccesed_copy.index, df_proccesed_copy.total, color='blue')
plt.xlabel ('Datetime')
plt.ylabel ('Feeds')
plt.legend(["Positive", "Neutral", 'Negative', 'Total'])
<matplotlib.legend.Legend at 0x7fb40f1a3b50>
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_proccesed_copy.index, y=df_proccesed_copy.positive,
mode='lines',
name='positive'))
fig.add_trace(go.Scatter(x=df_proccesed_copy.index, y=df_proccesed_copy.neutral,
mode='lines',
name='neutral'))
fig.add_trace(go.Scatter(x=df_proccesed_copy.index, y=df_proccesed_copy.negative,
mode='lines',
name='negative'))
fig.show()
positive_sma = SMA(df_proccesed_copy.positive, timeperiod=50)
neutral_sma = SMA(df_proccesed_copy.neutral, timeperiod=50)
negative_sma = SMA(df_proccesed_copy.negative, timeperiod=50)
total_sma = SMA(df_proccesed_copy.total, timeperiod=50)
plt.figure(figsize = (15,5))
plt.plot(df_proccesed_copy.index, positive_sma, color='green')
plt.plot(df_proccesed_copy.index, neutral_sma, color='grey')
plt.plot(df_proccesed_copy.index, negative_sma, color='red')
plt.plot(df_proccesed_copy.index, total_sma, color='blue')
plt.xlabel ('Datetime')
plt.ylabel ('Feeds')
plt.legend(["Positive", "Neutral", 'Negative', 'Total'])
<matplotlib.legend.Legend at 0x7fb4209e1820>
min_max_scaler = preprocessing.MinMaxScaler()
df_scale = df_proccesed_copy.copy()
df_scale.drop(columns=['total'],
axis='columns', inplace=True)
df_scale
| positive | neutral | negative | |
|---|---|---|---|
| Date | |||
| 2021-09-26 17:52:00+00:00 | 21 | 15 | 2 |
| 2021-09-26 17:53:00+00:00 | 32 | 32 | 8 |
| 2021-09-26 17:54:00+00:00 | 22 | 37 | 4 |
| 2021-09-26 17:55:00+00:00 | 42 | 23 | 6 |
| 2021-09-26 17:56:00+00:00 | 37 | 25 | 8 |
| ... | ... | ... | ... |
| 2021-10-01 12:11:00+00:00 | 43 | 61 | 13 |
| 2021-10-01 12:12:00+00:00 | 48 | 84 | 11 |
| 2021-10-01 12:13:00+00:00 | 47 | 58 | 10 |
| 2021-10-01 12:14:00+00:00 | 57 | 66 | 13 |
| 2021-10-01 12:15:00+00:00 | 42 | 67 | 13 |
6864 rows × 3 columns
maxvalue = max(df_scale.positive.max(), df_scale.neutral.max(), df_scale.negative.max())
minvalue = min(df_scale.positive.min(), df_scale.neutral.min(), df_scale.negative.min())
difmaxmin = maxvalue - minvalue
df_scale['positive_scale'] = (df_scale.positive - minvalue) / difmaxmin
df_scale['neutral_scale'] = (df_scale.neutral - minvalue) / difmaxmin
df_scale['negative_scale'] = (df_scale.negative - minvalue) / difmaxmin
df_scale
| positive | neutral | negative | positive_scale | neutral_scale | negative_scale | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2021-09-26 17:52:00+00:00 | 21 | 15 | 2 | 0.130435 | 0.093168 | 0.012422 |
| 2021-09-26 17:53:00+00:00 | 32 | 32 | 8 | 0.198758 | 0.198758 | 0.049689 |
| 2021-09-26 17:54:00+00:00 | 22 | 37 | 4 | 0.136646 | 0.229814 | 0.024845 |
| 2021-09-26 17:55:00+00:00 | 42 | 23 | 6 | 0.260870 | 0.142857 | 0.037267 |
| 2021-09-26 17:56:00+00:00 | 37 | 25 | 8 | 0.229814 | 0.155280 | 0.049689 |
| ... | ... | ... | ... | ... | ... | ... |
| 2021-10-01 12:11:00+00:00 | 43 | 61 | 13 | 0.267081 | 0.378882 | 0.080745 |
| 2021-10-01 12:12:00+00:00 | 48 | 84 | 11 | 0.298137 | 0.521739 | 0.068323 |
| 2021-10-01 12:13:00+00:00 | 47 | 58 | 10 | 0.291925 | 0.360248 | 0.062112 |
| 2021-10-01 12:14:00+00:00 | 57 | 66 | 13 | 0.354037 | 0.409938 | 0.080745 |
| 2021-10-01 12:15:00+00:00 | 42 | 67 | 13 | 0.260870 | 0.416149 | 0.080745 |
6864 rows × 6 columns
plt.figure(figsize = (15,5))
plt.plot(df_scale.index, df_scale.positive_scale, color='green')
plt.plot(df_scale.index, df_scale.neutral_scale, color='grey')
plt.plot(df_scale.index, df_scale.negative_scale, color='red')
#plt.plot(df_proccesed_copy.index, df_scaled.total, color='blue')
plt.xlabel ('Datetime')
plt.ylabel ('Feeds')
plt.legend(["Positive", "Neutral", 'Negative'])
<matplotlib.legend.Legend at 0x7fb411dc3790>
df_scale.positive
Date
2021-09-26 17:52:00+00:00 21
2021-09-26 17:53:00+00:00 32
2021-09-26 17:54:00+00:00 22
2021-09-26 17:55:00+00:00 42
2021-09-26 17:56:00+00:00 37
..
2021-10-01 12:11:00+00:00 43
2021-10-01 12:12:00+00:00 48
2021-10-01 12:13:00+00:00 47
2021-10-01 12:14:00+00:00 57
2021-10-01 12:15:00+00:00 42
Freq: T, Name: positive, Length: 6864, dtype: int64
positive_sma = SMA(df_scale.positive_scale, timeperiod=50)
neutral_sma = SMA(df_scale.neutral_scale, timeperiod=50)
negative_sma = SMA(df_scale.negative_scale, timeperiod=50)
#total_sma = SMA(df_scaled.total, timeperiod=50)
plt.figure(figsize = (15,5))
plt.plot(df_scale.index, positive_sma, color='green')
plt.plot(df_scale.index, neutral_sma, color='grey')
plt.plot(df_scale.index, negative_sma, color='red')
#plt.plot(df_proccesed_copy.index, total_sma, color='blue')
plt.xlabel ('Datetime')
plt.ylabel ('Feeds')
plt.legend(["Positive", "Neutral", 'Negative'])
<matplotlib.legend.Legend at 0x7fb41321c130>
FILE = 'BTS_USDT-1d.json'
FILE_PATH = str(Path(".").resolve().parent.parent) + '/user_data/data/binance/' + FILE
data = read_json(FILE_PATH,orient='values')
data.columns = ['Date','Open', 'High', 'Low', 'Close', 'Volume']
data = data.astype(dtype={'Open': 'float', 'High': 'float', #'Date' : 'int',
'Low': 'float', 'Close': 'float', 'Volume': 'float'})
data['Date'] = pd.to_datetime(data["Date"], unit='ms')
data.head()
data.tail()
| Date | Open | High | Low | Close | Volume | |
|---|---|---|---|---|---|---|
| 286 | 2021-10-14 | 0.04550 | 0.04862 | 0.04502 | 0.04805 | 84984449.5 |
| 287 | 2021-10-15 | 0.04807 | 0.05204 | 0.04619 | 0.04899 | 187467903.1 |
| 288 | 2021-10-16 | 0.04903 | 0.05894 | 0.04895 | 0.05016 | 333152277.6 |
| 289 | 2021-10-17 | 0.05026 | 0.05373 | 0.04828 | 0.04998 | 106213476.6 |
| 290 | 2021-10-18 | 0.04998 | 0.05185 | 0.04812 | 0.04920 | 136370488.3 |
date_max = max(df_proccesed_copy.index)
date_min = min(df_proccesed_copy.index)
print(date_max, date_min)
date_max = date_max.tz_localize(None)
date_min = date_min.tz_localize(None)
print(date_max, date_min)
2021-10-01 12:15:00+00:00 2021-09-26 17:52:00+00:00 2021-10-01 12:15:00 2021-09-26 17:52:00
data_filter = data[(data['Date'] >= date_min) & (data['Date'] <= date_max)]
data_filter
| Date | Open | High | Low | Close | Volume | |
|---|---|---|---|---|---|---|
| 269 | 2021-09-27 | 0.03941 | 0.04134 | 0.03746 | 0.03760 | 124327982.6 |
| 270 | 2021-09-28 | 0.03763 | 0.03884 | 0.03653 | 0.03686 | 58410462.4 |
| 271 | 2021-09-29 | 0.03685 | 0.03968 | 0.03621 | 0.03812 | 43562962.7 |
| 272 | 2021-09-30 | 0.03812 | 0.03994 | 0.03790 | 0.03954 | 54219331.1 |
| 273 | 2021-10-01 | 0.03951 | 0.04253 | 0.03892 | 0.04243 | 56374395.1 |
plt.figure(figsize = (15,5))
plt.plot(data_filter['Date'], data_filter['Close'], color='green')
plt.xlabel ('Datetime')
plt.ylabel ('Close')
##plt.legend(["$$"])
Text(0, 0.5, 'Close')
max_close_value = data_filter.Close.max()
min_close_value = data_filter.Close.min()
difmaxmin_bitcoin = max_close_value - min_close_value
data_filter['close_scale'] = (data_filter.Close - min_close_value) / difmaxmin_bitcoin
data_filter
/var/folders/5h/fc5kyswn7b51zcw0l0bvw2dc0000gn/T/ipykernel_88100/4015479081.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | Open | High | Low | Close | Volume | close_scale | |
|---|---|---|---|---|---|---|---|
| 269 | 2021-09-27 | 0.03941 | 0.04134 | 0.03746 | 0.03760 | 124327982.6 | 0.132855 |
| 270 | 2021-09-28 | 0.03763 | 0.03884 | 0.03653 | 0.03686 | 58410462.4 | 0.000000 |
| 271 | 2021-09-29 | 0.03685 | 0.03968 | 0.03621 | 0.03812 | 43562962.7 | 0.226212 |
| 272 | 2021-09-30 | 0.03812 | 0.03994 | 0.03790 | 0.03954 | 54219331.1 | 0.481149 |
| 273 | 2021-10-01 | 0.03951 | 0.04253 | 0.03892 | 0.04243 | 56374395.1 | 1.000000 |
plt.figure(figsize = (15,5))
plt.plot(data_filter['Date'], data_filter['close_scale'], color='green')
plt.xlabel ('Datetime')
plt.ylabel ('Close')
##plt.legend(["$$"])
Text(0, 0.5, 'Close')
price_sma = SMA(data_filter.close_scale, timeperiod=50)
price_sma
array([nan, nan, nan, nan, nan])
plt.figure(figsize = (15,5))
plt.plot(df_scale.index, positive_sma, color='green')
plt.plot(df_scale.index, neutral_sma, color='grey')
plt.plot(df_scale.index, negative_sma, color='red')
plt.plot(data_filter.Date, price_sma, color='blue')
#plt.plot(df_proccesed_copy.index, total_sma, color='blue')
plt.xlabel ('Datetime')
plt.ylabel ('Feeds')
plt.legend(["Positive", "Neutral", 'Negative','Price'])
<matplotlib.legend.Legend at 0x7f9a06964610>